package com.chance.cc.crawler.development.bootstrap.sina.carSeriesInformation;

import com.alibaba.fastjson.JSON;
import com.chance.cc.crawler.core.CrawlerEnum;
import com.chance.cc.crawler.core.downloader.proxy.Proxy;
import com.chance.cc.crawler.core.filter.FilterUtils;
import com.chance.cc.crawler.core.record.CrawlerRecord;
import com.chance.cc.crawler.core.record.CrawlerRequestRecord;
import com.chance.cc.crawler.development.controller.DevCrawlerController;
import org.apache.commons.lang3.StringUtils;

import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRequestType.*;

public class SinaCarSeriesInformationStart {

    private static Proxy proxy = new Proxy();

    static {
//        代理配置
//        H5168QRFNIU3804D
//        5F6B3610BB719FAA
        proxy.setHost("http-dyn.abuyun.com");
        proxy.setPort(9020);
        proxy.setUsername("HL89Q19E86E2987D");
        proxy.setPassword("71F33D94CE5F7BF2");
    }

    public static void main(String[] args) {
        String domain = "sina";
        String site = "carSeriesInformation";
        String url = "https://db.auto.sina.com.cn/news/4373/?page=1";
//        String url = "https://db.auto.sina.com.cn";

        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, CrawlerEnum.CrawlerRequestType.turnPage)
                .domain(domain)
                .httpUrl(url)
                .recordKey(url)
                .releaseTime(System.currentTimeMillis())
                .filter(CrawlerEnum.CrawlerRecordFilter.keyOrDateRange)
                .addFilterInfo(FilterUtils.memoryFilterKeyInfo(domain))
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24 *300, null))
                .proxy(proxy)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
                .build();
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);

        //添加评论去重信息
        CrawlerRecord filterCrawlerRecord = new CrawlerRecord();//过滤爬虫记录
        filterCrawlerRecord.setFilter(CrawlerEnum.CrawlerRecordFilter.keyOrDateRange);//根据key和时间过滤
        filterCrawlerRecord.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-", filter, domain, "comment")));//内存过滤
        filterCrawlerRecord.addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 300, null));//时间范围过滤
        crawlerRequestRecord.tagsCreator().bizTags().addCustomKV("comment_record_filter_info", JSON.toJSONString(filterCrawlerRecord));//自定义标签

        CrawlerRequestRecord keywordRecord = CrawlerRequestRecord.builder()
                .startPageRequest("autohome_series_keyword",turnPageItem)
                .httpUrl("http://192.168.1.215:9599/v1/meta/"+domain+"/keys?site=carSeriesVideo")
                .requestLabelTag(supportSource)
                .requestLabelTag(internalDownload)
                .build();

        DevCrawlerController devCrawlerController = DevCrawlerController.builder()
                .triggerInfo(domain, domain + "_trigger", System.currentTimeMillis(), domain + "_job")
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain)) //内存队列
                .crawlerThreadNum(10)
                .consoleResultPipeline("redis")
                .consoleResultPipeline("kafka")
                .fileResultPipeline("kafka", "D:\\chance_log\\新浪资讯Kafka-8-3_1.log", false)
                .fileResultPipeline("redis", "D:\\chance_log\\新浪资讯Mysql-8-3_2.log", false)
                .requestRecord(crawlerRequestRecord)  //more job
//                .supportRecord(keywordRecord)
                .build("com.chance.cc.crawler.development.scripts.sina");
        devCrawlerController.start();
    }
}
